// =================
// Prep Age Dataset
// =================

clear
import delim using "./input/inventors_census_matches/inventors_age_bpl.csv"

sum age, d
** 39 is median age in all patents, and this data is already limited to 1915-1925

drop inv_name mpcid
compress

tempfile ages
save	"`ages'", replace

// =================
// Patent Classes, CLS cities 
// =================

import 	delimited ".\input\CUSP\patents_cpc_categories_from_1900.csv", clear
duplicates drop patnum,  force

tempfile cpc
save	"`cpc'", replace
clear

// Combine Dates/Inventors and Collapse by Month-Year-County

use 	"./output/intermediate/citiescls_patents_list"
  
merge	m:1 patnum using "`cpc'"
/* _merge=1: international patents
   _merge=2: outside date range */
drop if _merge==2
drop 	_merge     

drop if cls_id<400  // Ensure merge on cls_id (useful) and removes many irrelevant obs (faster)

joinby patnum using "`ages'", _merge(_merge) unm(both)
keep if _merge==3
drop _merge
/* _merge=1: Not in age data (age already restricted to just f_yr (1915-1925)
   _merge=2: Outside date range etc */ 
   
** THIS IS WHERE TO EDIT AGES, ETC
gen pat_overeq40 = pat_wtd_inv*weight*(age>40) if !mi(age)
gen pat_under40 = pat_wtd_inv*weight*(age<=40) if !mi(age)
gen pat_wtd_invadj = weight*pat_wtd_inv

// Collapse, etc.
collapse (sum) pat_wtd_inv pat_wtd_invadj pat_overeq40 pat_under40, by(f_m f_yr cls_id)
compress

sort 	f_yr f_m cls_id  

gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm

keep 	f_myr cls_id pat_* 
*rename  cls_id master_id

tempfile cls_cities_age
save 	"`cls_cities_age'", replace


// =================
// Patent Data, NPI cities
// =================

// Combine Dates/Inventors and Collapse by Month-Year-County

use 	"./output/intermediate/cities_patents_list"
  
merge	m:1 patnum using "`cpc'"
/* _merge=1: international patents
   _merge=2: outside date range */
drop if _merge==2
drop 	_merge     

joinby patnum using "`ages'", _merge(_merge) unm(both)
keep if _merge==3
drop _merge
/* _merge=1: Not in age data (age already restricted to just f_yr (1915-1925)
   _merge=2: Outside date range etc */ 
   
** THIS IS WHERE TO EDIT AGES, ETC
gen pat_overeq40 = pat_wtd_inv*weight*(age>40) if !mi(age)
gen pat_under40 = pat_wtd_inv*weight*(age<=40) if !mi(age)
gen pat_wtd_invadj = weight*pat_wtd_inv

// Collapse, etc.
collapse (sum) pat_wtd_inv pat_wtd_invadj pat_overeq40 pat_under40, by(f_m f_yr npi_id)
compress

sort 	f_yr f_m npi_id  

gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm

keep 	f_myr npi_id pat_* 
*rename  npi_id master_id

append using "`cls_cities_age'"

gen 	master_id = npi_id
replace master_id = cls_id if mi(npi_id)

keep if inlist(master_id, 470, 596, 724, 780, 790, 794, 872) | mi(cls_id)
replace master_id = 470 if master_id==43
replace master_id = 596 if master_id==44
replace master_id = 724 if master_id==45
replace master_id = 780 if master_id==46
replace master_id = 790 if master_id==47
replace master_id = 794 if master_id==48
replace master_id = 872 if master_id==49

drop 	cls_id npi_id

xtset master_id f_myr
tsfill, full

unab pvars : pat_*

foreach v of local pvars {
	replace `v' = 0 if mi(`v')
}

keep if tin(1900m1, 1929m12)

order master_id f_myr, first

compress 
save 	"./output/intermediate/citiesall_patents_age.dta", replace

clear

